
## Instructions for Replication:

To replicate the tables, figures and numbers in the paper, you can
either run each chunks of this readme of sequentially run the following
scripts (after having installed the required packages):

- script/01_main_model.R
- script/02_main_results.R
- script/03_appendix_results.R

## Package

### Installation

Run the following script to install all the required libraries

``` r
if(!require("pacman", quietly = T)) install.packages("pacman")
pacman::p_load(
  # Data Manipulation & Cleaning
  tidyverse, janitor, fs, glue, scales,
  
  # Data Visualization & Plotting
  ggthemes, ggridges, ggrepel, viridis, patchwork,
  
  # Statistical Modeling & Analysis
  brms, BradleyTerry2, icr,
  
  # Utilities & Performance
  remotes, tictoc
)

# Stan for bayesian models
install.packages("cmdstanr", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
cmdstanr::install_cmdstan(overwrite = T)

# Bayesian estimation for pairwise comparison
remotes::install_github('davidissamattos/bpcs')
```

### Load packages

``` r
pacman::p_load(
  # Data Manipulation & Cleaning
  tidyverse, janitor, fs, glue, scales,
  
  # Data Visualization & Plotting
  ggthemes, ggridges, ggrepel, viridis, patchwork,
  
  # Statistical Modeling & Analysis
  brms, BradleyTerry2, icr,
  
  # Utilities & Performance
  remotes, tictoc
)

## Colors for visualization
ger_color  <- c(
  "AfD" = "#00ADEF", 
  "CDU/CSU" = "grey60",
  "FDP" = "#ebcc34",# "#ffed00",
  "Greens" = "#46962b",
  "SPD" = "#E2001A",
  "Left" = "#8B1A1A"
)
# knitr::purl("README.Rmd", output = "main_script.R")

if(!dir.exists("model")) dir_create("model")
if(!dir.exists("plot")) dir_create("plot")
```

## Input Data

### Comparison data

The data fed into the model consist of all comparison directly made by
the respondents. The dataset consists in 7 columns:

- outcome: result of the comparison (0: mp1 \< mp2, 1: mp1 \> mp2, 2:
  mp1 == mp2)
- mp_X: ID of MP X
- mp_party_X: Party of MP X
- respondent: Respondent’s ID
- respondent party: Respondent’s party

``` r
model_dt <- read_rds("input/0_respondents_rating.rds") %>%
  glimpse
```

    ## Rows: 22,906
    ## Columns: 7
    ## $ outcome          <dbl> 2, 2, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0,…
    ## $ mp_1             <chr> "A6", "A48", "A6", "A209", "A48", "A422", "A209", "A4…
    ## $ mp_party_1       <fct> cdu, cdu, cdu, cdu, cdu, green, cdu, cdu, spd, green,…
    ## $ mp_2             <chr> "A48", "A6", "A209", "A6", "A422", "A48", "A48", "A20…
    ## $ mp_party_2       <fct> cdu, cdu, cdu, cdu, green, cdu, cdu, cdu, green, spd,…
    ## $ respondent       <chr> "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2"…
    ## $ respondent_party <fct> green, green, green, green, green, green, green, gree…

### MP-Level Data

To validate our measure, we use collected data on representatives from
different sources:

- pageid: MPs’ ID
- name: MPs’ name
- party: MPs’ party
- region: MPs’ region
- constituency: MPs’ consituency (party list if elected on party list)
- gender: MPs’ gender
- faction: MPs’ faction (Sälzer 2020)
- ccs_lr: MPs’ score from Comparative Candidate Survey

``` r
mp_dt <- read_rds("input/mp_dt.rds") %>%
  glimpse()
```

    ## Rows: 729
    ## Columns: 8
    ## $ pageid       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
    ## $ name         <chr> "Hans Michelbach", "Patrick Sensburg", "André Berghegger"…
    ## $ party        <chr> "CDU/CSU", "CDU/CSU", "CDU/CSU", "CDU/CSU", "CDU/CSU", "C…
    ## $ region       <chr> "Bayern", "Nordrhein-Westfalen", "Niedersachsen", "Bayern…
    ## $ constituency <chr> "Coburg", "Hochsauerlandkreis", "Osnabrück-Land", "Erding…
    ## $ gender       <chr> "male", "male", "male", "male", "male", "female", "female…
    ## $ faction      <chr> "MIT", NA, NA, "MIT", NA, NA, NA, NA, NA, NA, NA, "MIT", …
    ## $ ccs_lr       <fct> NA, NA, NA, 6, 5, NA, NA, NA, 8, 6, NA, NA, NA, NA, 7, NA…

## Model computation

### Main model

``` r
set.seed(888)
tic("Model training")
model <- bpc(
  data = model_dt, 
  player0 = 'mp_1', 
  player1 = 'mp_2', 
  result_column = 'outcome',
  model_type = "davidson-U",
  cluster = "respondent",
  solve_ties = "none",
  priors = list(prior_lambda_std = 3.0,
                prior_S_std = 3.0,
                prior_U1_std = 3.0),
  show_chain_messages = T,
  seed = 888
)
toc()

write_rds(model, "model/0_main_model.rds")

tic("Parameter extraction")
param <- get_parameters_df(model, n_eff = T, Rhat = T)
toc()
write_rds(param, "data/0_main_params.rds")

main <- param %>% 
  mutate(param_type = str_extract(Parameter, ".*?(?=\\[|$)"), 
                page_id = str_extract(Parameter, "(?<=\\[A)\\d+"), 
                respondent = str_extract(Parameter, "(?<=\\[A\\d{1,4}\\,)\\w+(?=\\])")) %>%
  glimpse()

lambdas <- main %>%
  filter(param_type == "lambda") %>%
  transmute(pageid = as.numeric(page_id), 
                   mean = -Mean, 
                   median = -Median, 
                   low = -HPD_lower, 
                   high = -HPD_higher) %>%
  left_join(mp_dt) %>%
  mutate(party = fct_reorder(party, mean)) %>%
  glimpse()

write_rds(lambdas, "data/0_mp_params.rds")

respondent_bias <- main %>%
  filter(param_type == "U1") %>% 
  transmute(respondent, 
                   pageid = as.numeric(page_id), 
                   mean = -Mean, 
                   median = -Median, 
                   low = -HPD_lower, 
                   high = -HPD_higher) %>%
  left_join(mp_dt) %>%
  mutate(party = fct_reorder(party, mean)) %>%
  glimpse

write_rds(respondent_bias, "data/0_respondent_bias.rds")
```

### Bayesian bradley terry

``` r
tic("Model BT")
model_bt <- bpc(
  data = filter(model_dt, outcome != 2), 
  player0 = 'mp_1', 
  player1 = 'mp_2', 
  result_column = 'outcome',
  model_type = "bt-U",
  cluster = "respondent",
  solve_ties = "none",
  priors = list(prior_lambda_std = 3.0,
                prior_S_std = 3.0,
                prior_U1_std = 3.0),
  show_chain_messages = T,
  seed = 888
)
toc()
write_rds(model_bt, "model/1_model_bt.rds")

tic("Params BT")
param_bt <- get_parameters_df(model_bt, n_eff = T, Rhat = T)
toc()
write_rds(param_bt, "data/1_param_bt.rds")
```

### Frequentist Bradley Terry Model

``` r
bt_data <- model_dt %>%
  select(mp_1, mp_2, outcome) %>%
  filter(outcome != 2) %>%
  mutate(outcome = ifelse(outcome == 0, "win", "loss")) %>%
  count(mp_1, mp_2, outcome)  %>%
  pivot_wider(id_cols = c(mp_1, mp_2), names_from = outcome, values_from = n)  %>%
  mutate(loss = ifelse(is.na(loss), 0, loss), 
         win = ifelse(is.na(win), 0, win)) %>%
  arrange(mp_1) %>%
  mutate(mp_1 = as.factor(mp_1)) %>%
  arrange(mp_2) %>%
  mutate(mp_2 = as.factor(mp_2)) %>%
  glimpse

tic("Frequentist Bradley-Terry")
bt_model_obj <- BTm(cbind(win, loss), mp_1, mp_2, data = bt_data)
toc()
write_rds(bt_model_obj, "model/2_frequentist_bradley_terry.rds")

bt_preds <- BTabilities(bt_model_obj) %>%
  as.data.frame() %>%
  rownames_to_column() %>%
  clean_names() %>%
  transmute(pageid = as.numeric(str_extract(rowname, "\\d+")), 
            mean_bt = -ability, 
            low_bt =  mean_bt - 1.96*s_e,
            high_bt = mean_bt + 1.96*s_e) %>%
  glimpse

write_rds(bt_preds, "data/2_frequentist_preds.rds")
```

## Results

### Load artefacts

``` r
# Raw comparisons
raw_answers <- read_rds("input/0_respondents_rating.rds") %>%
  mutate(
    id1 = as.numeric(str_extract(mp_1, "\\d+")),
    id2 = as.numeric(str_extract(mp_2, "\\d+"))
  ) %>%
  glimpse
```

    ## Rows: 22,906
    ## Columns: 9
    ## $ outcome          <dbl> 2, 2, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0,…
    ## $ mp_1             <chr> "A6", "A48", "A6", "A209", "A48", "A422", "A209", "A4…
    ## $ mp_party_1       <fct> cdu, cdu, cdu, cdu, cdu, green, cdu, cdu, spd, green,…
    ## $ mp_2             <chr> "A48", "A6", "A209", "A6", "A422", "A48", "A48", "A20…
    ## $ mp_party_2       <fct> cdu, cdu, cdu, cdu, green, cdu, cdu, cdu, green, spd,…
    ## $ respondent       <chr> "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2"…
    ## $ respondent_party <fct> green, green, green, green, green, green, green, gree…
    ## $ id1              <dbl> 6, 48, 6, 209, 48, 422, 209, 48, 555, 422, 6, 136, 26…
    ## $ id2              <dbl> 48, 6, 209, 6, 422, 48, 48, 209, 422, 555, 136, 6, 20…

``` r
# Helpers for individual visulation of MPs
targets <- read_rds("input/targets.rds") %>%
  mutate(pageid = as.factor(pageid)) %>%
  glimpse
```

    ## Rows: 41
    ## Columns: 2
    ## $ pageid <fct> 635, 201, 709, 636, 699, 639, 675, 209, 67, 6, 17, 11, 20, 202,…
    ## $ nudge  <dbl> 0.7, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.…

``` r
# Estimates for each individual MP
lambdas <- read_rds("data/0_mp_params.rds") %>%
  mutate(pageid = pageid %>%
           as.factor() %>%
           fct_reorder(mean) %>%
           fct_reorder(as.numeric(party))
  ) %>%
  glimpse
```

    ## Rows: 723
    ## Columns: 12
    ## $ pageid       <fct> 6, 48, 209, 422, 555, 136, 262, 659, 259, 558, 202, 201, …
    ## $ mean         <dbl> 1.340344, 1.459447, 5.566633, -3.750410, -1.606888, 2.317…
    ## $ median       <dbl> 1.342435, 1.457895, 5.566670, -3.746360, -1.606080, 2.319…
    ## $ low          <dbl> 1.815020, 1.970640, 6.392920, -2.957700, -0.843048, 3.144…
    ## $ high         <dbl> 0.864496, 0.945979, 4.718890, -4.583460, -2.331350, 1.475…
    ## $ name         <chr> "Angela Merkel", "Ursula von der Leyen", "Philipp Amthor"…
    ## $ party        <fct> CDU/CSU, CDU/CSU, CDU/CSU, Greens, SPD, CDU/CSU, Left, Af…
    ## $ region       <chr> "Mecklenburg-Vorpommern", "Niedersachsen", "Mecklenburg-V…
    ## $ constituency <chr> "Vorpommern-Rügen – Vorpommern-Greifswald I", "Landeslist…
    ## $ gender       <chr> "female", "female", "male", "female", "male", "male", "fe…
    ## $ faction      <chr> NA, NA, NA, "REAL", NA, "MIT", "KP", "nk", NA, "NB", "Mer…
    ## $ ccs_lr       <fct> NA, 6, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

``` r
# Respondent specific biases
respondent_bias <- read_rds("data/0_respondent_bias.rds") %>%
  glimpse
```

    ## Rows: 17,352
    ## Columns: 13
    ## $ respondent   <chr> "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2…
    ## $ pageid       <dbl> 6, 48, 209, 422, 555, 136, 262, 659, 259, 558, 202, 201, …
    ## $ mean         <dbl> -0.25347669, -0.49794650, -0.41475720, -0.92818624, -0.48…
    ## $ median       <dbl> -0.26557800, -0.49735050, -0.39977500, -0.87772350, -0.49…
    ## $ low          <dbl> 0.656498, 0.138103, 0.743992, 0.294040, 0.435203, 0.81570…
    ## $ high         <dbl> -1.150050, -1.198090, -1.594930, -2.343250, -1.456110, -1…
    ## $ name         <chr> "Angela Merkel", "Ursula von der Leyen", "Philipp Amthor"…
    ## $ party        <fct> CDU/CSU, CDU/CSU, CDU/CSU, Greens, SPD, CDU/CSU, Left, Af…
    ## $ region       <chr> "Mecklenburg-Vorpommern", "Niedersachsen", "Mecklenburg-V…
    ## $ constituency <chr> "Vorpommern-Rügen – Vorpommern-Greifswald I", "Landeslist…
    ## $ gender       <chr> "female", "female", "male", "female", "male", "male", "fe…
    ## $ faction      <chr> NA, NA, NA, "REAL", NA, "MIT", "KP", "nk", NA, "NB", "Mer…
    ## $ ccs_lr       <fct> NA, 6, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

### Computed results

### Number of comparisons

``` r
unique_comparisons <- raw_answers %>% filter(id1 > id2)
comparisons_count_by_mp <- raw_answers %>% count(mp_1, sort = T)
sum_stats <- summary(comparisons_count_by_mp$n)
respondent_counts <- raw_answers %>% 
  summarise(n = n()/2, 
            unique_mps = length(unique(mp_1)),
            .by = c(respondent, respondent_party)) %>%
  arrange(desc(n))
sum_stats_respondent = summary(respondent_counts$n)
sum_stats_respondent_unique_mps = summary(respondent_counts$unique_mps)


print(glue("Total number of comparisons: {nrow(unique_comparisons)}"))
```

    ## Total number of comparisons: 11453

``` r
print(glue("{comparisons_count_by_mp$mp_1[1]} (Angela Merkel) was compared {comparisons_count_by_mp$n[1]} times"))
```

    ## A6 (Angela Merkel) was compared 1353 times

``` r
print(glue("On average, each MP was compared {round(sum_stats[['Mean']], 2)} times (Median: {sum_stats[['Median']]})"))
```

    ## On average, each MP was compared 31.68 times (Median: 19)

``` r
print(glue("On average, each respondent compared {round(sum_stats_respondent[['Mean']], 2)} pairs of MPs (Median: {sum_stats_respondent[['Median']]})"))
```

    ## On average, each respondent compared 477.21 pairs of MPs (Median: 489)

``` r
print(glue("On average, each respondent compared {round(sum_stats_respondent_unique_mps[['Mean']], 2)} unique MPs (Median: {sum_stats_respondent_unique_mps[['Median']]})"))
```

    ## On average, each respondent compared 281.21 unique MPs (Median: 321)

### Inter-Coder Reliability - Krippendorff’s alpha

``` r
krippalpha_dt <- raw_answers %>% 
  filter(outcome != 2) %>%
  mutate(
    id1 = as.numeric(str_extract(mp_1, "\\d+")),
    id2 = as.numeric(str_extract(mp_2, "\\d+"))
  ) %>%
  filter(id1 < id2) %>%
  mutate(id = paste(mp_1, mp_2)) %>%
  filter(n() > 2, .by = id) %>%
  select(id, respondent, outcome) %>%
  mutate(outcome = recode(outcome, "0" = -1, "1" = 1, "2" = 0))

print(glue("Krippendorff's alpha computed with {nrow(krippalpha_dt)} comparisons featuring {nrow(count(krippalpha_dt, id))} unique paris"))
```

    ## Krippendorff's alpha computed with 1319 comparisons featuring 318 unique paris

``` r
krippalpha_results <- krippalpha_dt %>%
  pivot_wider(id_cols = id, names_from = respondent, values_from = outcome) %>%
  select(-id) %>%
  as.matrix() %>%
  t() %>%
  krippalpha(data = ., metric = "nominal")

print(glue("Krippendorff's alpha of {round(krippalpha_results[['alpha']], 2)}"))
```

    ## Krippendorff's alpha of 0.77

### External dataset

``` r
ccs_correlation <- cor(as.numeric(lambdas$ccs_lr), lambdas$mean, use = 'complete.obs')

print(glue("The faction, provided by Sältzer (2020), was available for {lambdas %>% filter(!is.na(faction)) %>% nrow()} MPs."))
```

    ## The faction, provided by Sältzer (2020), was available for 230 MPs.

``` r
print(glue("We were able to merge {lambdas %>% filter(!is.na(ccs_lr)) %>% nrow()} from the Comparative Candidate Survey based on respondents party, region and position on the list"))
```

    ## We were able to merge 182 from the Comparative Candidate Survey based on respondents party, region and position on the list

``` r
print(glue("The correlation between our estimates and the Comparative Candidate Survey is {round(ccs_correlation, 3)}"))
```

    ## The correlation between our estimates and the Comparative Candidate Survey is 0.854

### Figure 2: Ideological distribution of MPs in the 19th Bundestag by political party

``` r
lambdas %>% 
  ggplot(aes(x = mean, y = party, fill = party)) + 
  geom_density_ridges(alpha = .7, color = "grey50", quantile_lines = T, quantiles = 2,
                      jittered_points = TRUE,
                      position = position_points_jitter(width = 0.05, height = 0),
                      point_shape = '|', point_size = 4, point_alpha = .9) +
  scale_fill_manual(values = ger_color) +
  guides(fill = "none") +
  labs(x = "Ideological Position", y = "") +
  theme_few()
```

    ## Picking joint bandwidth of 0.576

![](README_files/figure-gfm/unnamed-chunk-11-1.png)<!-- -->

``` r
ggsave(filename = "plot/2_ideological_distribution_of_parties.png", width = 8, height = 5)
```

    ## Picking joint bandwidth of 0.576

### Figure 3: Estimated individual positions for the members of the 19th Bundestag

``` r
label_dt <- lambdas %>% 
  inner_join(targets) %>%
  mutate(
    nudge = case_when(
      # Ralph Brinkhaus, Thomas L. Kemmerich, Wolfgang Kubicki, Katja Suding, Cem Özdemir, Anton Hofreiter, Claudia Roth
      pageid %in% as.numeric(c("636", "699", "399", "404", "372", "427", "406", "431")) ~ 7,
      #Philipp Amthor, Carsten Linnemann, Helge Braun, Heiko Maas, Saskia Esken, Aydan Özoğuz, Jan Korte, Bernd Riexinger, Jürgen Trittin
      pageid %in% as.numeric(c("209", "210", "11", "20", "555", "605", "508", "284", "268", "441")) ~ -7,
      TRUE ~ 7*nudge
    )
  )
```

    ## Joining with `by = join_by(pageid)`

``` r
lambdas %>%
  ggplot(aes(x = pageid, y = mean, color = party)) +
  geom_point(size = .5, show.legend = T) +
  scale_color_manual(values = ger_color) +
  labs(y = "Ideological Position", x = "") +
  geom_text_repel(data = label_dt, aes(label = name), nudge_y = label_dt$nudge, max.iter = 3000, size = 3, force = 10) +
  geom_errorbar(aes(ymin = low, ymax = high), size = .2, alpha = .25, show.legend = F) +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.border = element_blank(),
        legend.position = "none")
```

    ## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
    ## ℹ Please use `linewidth` instead.
    ## This warning is displayed once every 8 hours.
    ## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
    ## generated.

![](README_files/figure-gfm/unnamed-chunk-12-1.png)<!-- -->

``` r
ggsave(filename = "plot/3_individual_positions.png", width = 12, height = 6)
```

### Figure 4: Comparison of legislators belonging to different partisan factions and between sister parties (CDU and CSU)

``` r
lambdas %>% 
  filter(party %in% c("Greens", "SPD", "CDU/CSU", "Left", "AfD")) %>% 
  mutate(faction = case_when(party == "Greens" & faction == "FUNDI" ~ "Greens - Fundamentalist",
                             party == "Greens" & faction == "REAL" ~ "Greens - Realist",
                             party == "SPD" & faction == "SEEH" ~ "SPD - Seeheim Circle",
                             party == "SPD" ~ "SPD - Rest of the Party",
                             party == "CDU/CSU" &  region == "Bayern" ~ "CSU",
                             party == "CDU/CSU" ~ "CDU",
                             T ~ NA_character_)) %>%
  filter(!is.na(faction)) %>%
  mutate(faction = fct_reorder(as.character(faction), mean)) %>%
  ggplot(aes(x = mean, y = faction, fill = party)) + geom_boxplot(alpha = .5, show.legend = F) + 
  scale_fill_manual(values = ger_color) +
  theme_bw() +
  labs(y = "", x = "Ideological Position")
```

![](README_files/figure-gfm/unnamed-chunk-13-1.png)<!-- -->

``` r
ggsave(filename = "plot/4_comparisons_between_factions.png", width = 8, height = 5)
```

### Figure 5: Comparison of expert-based estimates with self-placement

``` r
set.seed(888)
ccs_correlation <- cor(as.numeric(lambdas$ccs_lr), lambdas$mean, use = 'complete.obs')

lambdas %>% 
  filter(!is.na(party) & !is.na(ccs_lr)) %>% 
  mutate(mean = 10*(mean - min(mean))/(max(mean) - min(mean))) %>%
  ggplot(aes(x = mean, y = ccs_lr, color = party)) +
  geom_jitter(height = 0.25) +
  scale_color_manual(values = ger_color) +
  theme_bw() +
  labs(subtitle = glue("Correlation: {round(ccs_correlation, 3)}"), 
       x = "Ideological Position (our estimate)", 
       y = "MPs' self-estimate (CCS)", 
       color = "") +
  theme(plot.background = element_rect(fill = "white", colour = NA), 
        panel.background = element_rect(fill = "white", colour = NA))
```

![](README_files/figure-gfm/unnamed-chunk-14-1.png)<!-- -->

``` r
ggsave(filename = "plot/5_comparison_with_ccs.png", width = 8, height = 5)
```

### Figure 6: Posterior distribution of respondent-specific bias parameters

``` r
custom_boxplot_stats_90 <- function(x) {
  r <- quantile(x, probs = c(0.05, 0.25, 0.5, 0.75, 0.95))
  names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
  return(r)
}

# Function to compute 95% interval statistics for the boxplot
custom_boxplot_stats_95 <- function(x) {
  r <- quantile(x, probs = c(0.025, 0.25, 0.5, 0.75, 0.975))
  names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
  return(r)
}

respondent_bias %>% 
  mutate(user = paste("Resp ", as.character(as.numeric(as.factor(respondent)))) %>% fct_reorder(mean)) %>%
  ggplot(aes(x = mean, y = user)) +
  stat_summary(fun.data = custom_boxplot_stats_95, geom = "boxplot", fatten = 2, width = 0.6, lwd = 0.2, color = "blue", alpha = 0) +
  stat_summary(fun.data = custom_boxplot_stats_90, geom = "boxplot", fatten = 2, width = 0.6, alpha = 0.6) +
  theme_minimal() +
  coord_cartesian(xlim = c(-1.5, 1.5)) +
  labs(x = "Estimated respondent specific bias", y = "Respondents\n") +
  theme(plot.background = element_rect(fill = "white", colour = NA), 
        panel.background = element_rect(fill = "white", colour = NA))
```

![](README_files/figure-gfm/unnamed-chunk-15-1.png)<!-- -->

``` r
ggsave(filename = "plot/6_respondent_bias.png", width = 8, height = 5)
```

## Appendix

### Load additional data for Appendix

``` r
# Timestamps for each individual comparison
comp_timestamps <- read_rds("input/comp_with_timestamps.rds") %>%
  glimpse
```

    ## Rows: 11,453
    ## Columns: 3
    ## $ user      <chr> "green 2", "green 2", "green 2", "green 2", "green 2", "gree…
    ## $ time_diff <dbl> 0.000000, 4.862782, 4.914599, 4.315796, 4.756616, 7.500093, …
    ## $ time      <dbl> 1583857624, 1583857629, 1583857634, 1583857638, 1583857643, …

``` r
# MP's info
mp_dt <- read_rds("input/mp_dt.rds") %>%
  glimpse()
```

    ## Rows: 729
    ## Columns: 8
    ## $ pageid       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
    ## $ name         <chr> "Hans Michelbach", "Patrick Sensburg", "André Berghegger"…
    ## $ party        <chr> "CDU/CSU", "CDU/CSU", "CDU/CSU", "CDU/CSU", "CDU/CSU", "C…
    ## $ region       <chr> "Bayern", "Nordrhein-Westfalen", "Niedersachsen", "Bayern…
    ## $ constituency <chr> "Coburg", "Hochsauerlandkreis", "Osnabrück-Land", "Erding…
    ## $ gender       <chr> "male", "male", "male", "male", "male", "female", "female…
    ## $ faction      <chr> "MIT", NA, NA, "MIT", NA, NA, NA, NA, NA, NA, NA, "MIT", …
    ## $ ccs_lr       <fct> NA, NA, NA, 6, 5, NA, NA, NA, 8, 6, NA, NA, NA, NA, 7, NA…

``` r
# Raw parameter from Bayesian Davidson model
dav_bayesian_preds <- read_rds("data/0_main_params.rds") %>%
  mutate(param_type = str_extract(Parameter, ".*?(?=\\[|$)"),
         pageid = as.numeric(str_extract(Parameter, "(?<=\\[A)\\d+"))) %>%
  filter(param_type == "lambda") %>%
  transmute(pageid, mean = -Mean, low = -HPD_lower, high = -HPD_higher, Rhat) %>%
  left_join(mp_dt, by = "pageid") %>%
  mutate(party = fct_reorder(party, mean)) %>%
  glimpse
```

    ## Rows: 723
    ## Columns: 12
    ## $ pageid       <dbl> 6, 48, 209, 422, 555, 136, 262, 659, 259, 558, 202, 201, …
    ## $ mean         <dbl> 1.340344, 1.459447, 5.566633, -3.750410, -1.606888, 2.317…
    ## $ low          <dbl> 1.815020, 1.970640, 6.392920, -2.957700, -0.843048, 3.144…
    ## $ high         <dbl> 0.864496, 0.945979, 4.718890, -4.583460, -2.331350, 1.475…
    ## $ Rhat         <dbl> 1.0039, 1.0019, 1.0009, 1.0038, 1.0022, 1.0008, 1.0011, 1…
    ## $ name         <chr> "Angela Merkel", "Ursula von der Leyen", "Philipp Amthor"…
    ## $ party        <fct> CDU/CSU, CDU/CSU, CDU/CSU, Greens, SPD, CDU/CSU, Left, Af…
    ## $ region       <chr> "Mecklenburg-Vorpommern", "Niedersachsen", "Mecklenburg-V…
    ## $ constituency <chr> "Vorpommern-Rügen – Vorpommern-Greifswald I", "Landeslist…
    ## $ gender       <chr> "female", "female", "male", "female", "male", "male", "fe…
    ## $ faction      <chr> NA, NA, NA, "REAL", NA, "MIT", "KP", "nk", NA, "NB", "Mer…
    ## $ ccs_lr       <fct> NA, 6, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

``` r
# Raw parameter from Bayesian BT model
bt_bayesian_preds <- read_rds("data/1_param_bt.rds") %>%
  mutate(param_type = str_extract(Parameter, ".*?(?=\\[|$)"),
         pageid = as.numeric(str_extract(Parameter, "(?<=\\[A)\\d+"))) %>%
  filter(param_type == "lambda") %>%
  transmute(pageid, mean_bt = -Mean, low_bt = -HPD_lower, high_bt = -HPD_higher) %>%
  glimpse
```

    ## Rows: 721
    ## Columns: 4
    ## $ pageid  <dbl> 6, 209, 48, 422, 555, 136, 262, 659, 259, 558, 202, 201, 268, …
    ## $ mean_bt <dbl> -0.08957864, 5.73158529, 1.00658781, -5.08998985, -2.44412934,…
    ## $ low_bt  <dbl> 1.14291, 7.21660, 2.25245, -3.79541, -1.10535, 4.38616, -6.488…
    ## $ high_bt <dbl> -1.274890, 4.297650, -0.249981, -6.545960, -3.803940, 1.279320…

``` r
# Raw parameter from Frequentist BT model
bt_freq_preds <- read_rds("data/2_frequentist_preds.rds") %>%
  select(pageid, mean_bt_f = mean_bt, low_bt_f = low_bt, high_bt_f = high_bt) %>%
  glimpse
```

    ## Rows: 721
    ## Columns: 4
    ## $ pageid    <dbl> 1, 10, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 11,…
    ## $ mean_bt_f <dbl> 0.00000000, 0.43669807, 18.65281793, 17.90480761, 1.33593234…
    ## $ low_bt_f  <dbl> 0.00000000, -2.04808495, -6088.93330502, -8535.36658189, -1.…
    ## $ high_bt_f <dbl> 0.0000000, 2.9214811, 6126.2389409, 8571.1761971, 3.7435719,…

``` r
merged_estimates <- dav_bayesian_preds %>%
  left_join(bt_bayesian_preds, by = join_by(pageid)) %>%
  left_join(bt_freq_preds, by = join_by(pageid)) %>%
  glimpse
```

    ## Rows: 723
    ## Columns: 18
    ## $ pageid       <dbl> 6, 48, 209, 422, 555, 136, 262, 659, 259, 558, 202, 201, …
    ## $ mean         <dbl> 1.340344, 1.459447, 5.566633, -3.750410, -1.606888, 2.317…
    ## $ low          <dbl> 1.815020, 1.970640, 6.392920, -2.957700, -0.843048, 3.144…
    ## $ high         <dbl> 0.864496, 0.945979, 4.718890, -4.583460, -2.331350, 1.475…
    ## $ Rhat         <dbl> 1.0039, 1.0019, 1.0009, 1.0038, 1.0022, 1.0008, 1.0011, 1…
    ## $ name         <chr> "Angela Merkel", "Ursula von der Leyen", "Philipp Amthor"…
    ## $ party        <fct> CDU/CSU, CDU/CSU, CDU/CSU, Greens, SPD, CDU/CSU, Left, Af…
    ## $ region       <chr> "Mecklenburg-Vorpommern", "Niedersachsen", "Mecklenburg-V…
    ## $ constituency <chr> "Vorpommern-Rügen – Vorpommern-Greifswald I", "Landeslist…
    ## $ gender       <chr> "female", "female", "male", "female", "male", "male", "fe…
    ## $ faction      <chr> NA, NA, NA, "REAL", NA, "MIT", "KP", "nk", NA, "NB", "Mer…
    ## $ ccs_lr       <fct> NA, 6, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
    ## $ mean_bt      <dbl> -0.08957864, 1.00658781, 5.73158529, -5.08998985, -2.4441…
    ## $ low_bt       <dbl> 1.14291, 2.25245, 7.21660, -3.79541, -1.10535, 4.38616, -…
    ## $ high_bt      <dbl> -1.274890, -0.249981, 4.297650, -6.545960, -3.803940, 1.2…
    ## $ mean_bt_f    <dbl> -1.09306227, -0.56250891, 2.31843993, -3.80841599, -2.571…
    ## $ low_bt_f     <dbl> -2.8344301, -2.3146020, 0.5148811, -5.5772339, -4.3340204…
    ## $ high_bt_f    <dbl> 0.6483056, 1.1895841, 4.1219988, -2.0395981, -0.8083719, …

``` r
# Load the simulation results
sim_results <- read_rds("input/0_simulations_results.rds") %>%
  mutate(
    n_coders = fct_reorder(factor(n_coders), n_coders),
    n_pairs = fct_reorder(factor(glue("{n_pairs} pairs")), n_pairs)
  ) %>%
  glimpse
```

    ## Rows: 1,600
    ## Columns: 11
    ## $ cor        <dbl> 0.9544247, 0.9433311, 0.9561827, 0.8617714, 0.8092624, 0.99…
    ## $ cor_imp    <dbl> 0.9544247, 0.9421983, 0.9561827, 0.8609750, 0.8064667, 0.99…
    ## $ prop_found <dbl> 1.0000000, 0.9985714, 1.0000000, 0.9971429, 0.9914286, 1.00…
    ## $ estimates  <list> [<tbl_df[700 x 6]>], [<tbl_df[700 x 6]>], [<tbl_df[700 x 6…
    ## $ n_coders   <fct> 20, 3, 5, 3, 7, 20, 7, 7, 20, 5, 3, 7, 10, 10, 5, 7, 10, 3,…
    ## $ n_pairs    <fct> 250 pairs, 1000 pairs, 750 pairs, 750 pairs, 250 pairs, 750…
    ## $ sort_units <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
    ## $ random     <lgl> TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, T…
    ## $ prob_error <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
    ## $ seed       <int> 654, 10, 375, 279, 580, 82, 361, 719, 421, 574, 574, 291, 8…
    ## $ file       <chr> "sims/estimates//0069f5e194b225121acd09e9a321c8dc.rds", "si…

``` r
bias_avg_dt <- sim_results %>%
  group_by(random, n_coders, n_pairs, sort_units, prob_error) %>%
  summarise(
    across(c(cor, prop_found), list(mean = mean, sd = sd), na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) %>%
  mutate(random = ifelse(random, "rand", "algo")) %>%
  pivot_wider(
    id_cols = c(n_coders, n_pairs, sort_units, prob_error),
    names_from = random,
    values_from = c(cor_mean, prop_found_mean, cor_sd, prop_found_sd, n)
  ) %>%
  mutate(
    cor_diff = cor_mean_rand - cor_mean_algo,
    prop_found_diff = prop_found_mean_rand - prop_found_mean_algo,
    cor_se = sqrt(cor_sd_rand^2 / n_rand + cor_sd_algo^2 / n_algo),
    prop_found_se = sqrt(prop_found_sd_rand^2 / n_rand + prop_found_sd_algo^2 / n_algo),
    cor_low = cor_diff - 1.96 * cor_se,
    cor_high = cor_diff + 1.96 * cor_se,
    prop_found_low = prop_found_diff - 1.96 * prop_found_se,
    prop_found_high = prop_found_diff + 1.96 * prop_found_se
  ) %>%
  glimpse
```

    ## Warning: There was 1 warning in `summarise()`.
    ## ℹ In argument: `across(c(cor, prop_found), list(mean = mean, sd = sd), na.rm =
    ##   TRUE)`.
    ## ℹ In group 1: `random = FALSE`, `n_coders = 3`, `n_pairs = 250 pairs`,
    ##   `sort_units = FALSE`, `prob_error = 0`.
    ## Caused by warning:
    ## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
    ## Supply arguments directly to `.fns` through an anonymous function instead.
    ## 
    ##   # Previously
    ##   across(a:b, mean, na.rm = TRUE)
    ## 
    ##   # Now
    ##   across(a:b, \(x) mean(x, na.rm = TRUE))

    ## Rows: 20
    ## Columns: 22
    ## $ n_coders             <fct> 3, 3, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 10, 10, 10, 1…
    ## $ n_pairs              <fct> 250 pairs, 500 pairs, 750 pairs, 1000 pairs, 250 …
    ## $ sort_units           <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
    ## $ prob_error           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
    ## $ cor_mean_algo        <dbl> 0.5871393, 0.8203673, 0.9023839, 0.9497982, 0.758…
    ## $ cor_mean_rand        <dbl> 0.5727700, 0.7834164, 0.8641012, 0.9062837, 0.740…
    ## $ prop_found_mean_algo <dbl> 0.8751071, 0.9913929, 0.9995000, 0.9999643, 0.975…
    ## $ prop_found_mean_rand <dbl> 0.8640357, 0.9868929, 0.9981071, 0.9998214, 0.971…
    ## $ cor_sd_algo          <dbl> 0.0453082037, 0.0137718562, 0.0104001516, 0.00512…
    ## $ cor_sd_rand          <dbl> 0.0319015248, 0.0172732013, 0.0112348272, 0.00894…
    ## $ prop_found_sd_algo   <dbl> 0.0101112985, 0.0032105188, 0.0007621337, 0.00022…
    ## $ prop_found_sd_rand   <dbl> 0.0080869652, 0.0036007950, 0.0015645036, 0.00047…
    ## $ n_algo               <int> 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 4…
    ## $ n_rand               <int> 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 4…
    ## $ cor_diff             <dbl> -0.014369279, -0.036950909, -0.038282729, -0.0435…
    ## $ prop_found_diff      <dbl> -0.0110714286, -0.0045000000, -0.0013928571, -0.0…
    ## $ cor_se               <dbl> 0.0087614791, 0.0034929483, 0.0024206636, 0.00163…
    ## $ prop_found_se        <dbl> 0.00204717710, 0.00076277709, 0.00027515991, 0.00…
    ## $ cor_low              <dbl> -0.031541778, -0.043797088, -0.043027230, -0.0467…
    ## $ cor_high             <dbl> 0.002803220, -0.030104730, -0.033538229, -0.04031…
    ## $ prop_found_low       <dbl> -0.0150838957, -0.0059950431, -0.0019321706, -0.0…
    ## $ prop_found_high      <dbl> -0.00705896146, -0.00300495691, -0.00085354371, 0…

### A - Comparison pace

``` r
median_time_per_comp <- comp_timestamps %>%
  summarise(median_time_diff = median(time_diff, na.rm = TRUE)) %>%
  round(1)

total_time <- comp_timestamps %>%
  # Removing any interval larger than 10 minutes
  filter(time_diff < 10*60) %>%
  summarise(total_time = sum(time_diff, na.rm = TRUE) / 60, .by = user) %>%
  summarise(total_mean_time = mean(total_time)) %>%
  round(1)

print(glue("The median time required to complete a comparison was {median_time_per_comp} seconds. Completing the survey required around {total_time} minutes."))
```

    ## The median time required to complete a comparison was 12.8 seconds. Completing the survey required around 153.1 minutes.

### Figure A2 - Comparison over time

``` r
comp_timestamps %>%
  mutate(
    time = (time - min(time))/60/60,
    longer = time > 24,
    time = ifelse(longer, 24, time),
    .by = user
  ) %>%
  ggplot(aes(x =  user, y = time, colour = longer, shape = longer)) +
  geom_point(show.legend = F) +
  coord_flip() +
  theme_bw() +
  scale_colour_manual(values = c("black", "red")) +
  labs(y = "Time elapsed since the first comparison (in hours)", x = "Respondent")
```

![](README_files/figure-gfm/unnamed-chunk-17-1.png)<!-- -->

``` r
ggsave(filename = "plot/A2_comparaison_over_time.png", width = 8, height = 5)
```

### Results of the simulations

#### Figure B3: Raw results of the simulations

``` r
sim_results %>%
  mutate(sort_units = ifelse(sort_units, "Sorted", "Non-sorted"),
         random = ifelse(random, "Random", "Custom algorithm")) %>%
  ggplot(aes(x = cor, prop_found, color = n_pairs)) +
  geom_point(size = .5, alpha = .4) +
  facet_grid(n_coders~random) +
  scale_color_viridis(discrete = T, direction = -1, begin = 0, end = .9) +
  theme_minimal() +
  labs(x = "Correlation with Ground Truth", y = "% of recovered MPs", color = "") +
  scale_y_continuous(labels = percent) +
  theme(plot.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA))
```

![](README_files/figure-gfm/unnamed-chunk-18-1.png)<!-- -->

``` r
ggsave(filename = "plot/B3_sim_raw.png", width = 8, height = 5)
```

#### Figure B4: Performance of the custom algorithm against the random exploration

``` r
bias_avg_dt %>%
  ggplot(aes(x = cor_diff, y = prop_found_diff, color = n_coders)) +
  geom_hline(yintercept = 0, linetype = 4, color = "grey70") +
  geom_vline(xintercept = 0, linetype = 4, color = "grey70") +
  geom_pointrange(aes(xmin = cor_low, xmax = cor_high), size = .03) +
  geom_errorbar(aes(ymin = prop_found_low, ymax = prop_found_high)) +
  facet_wrap(~n_pairs) +
  labs(
    x = "\nCorrelation with GT (Random - Algorithm)\n[Negative means algorithm is better correlated with ground truth.]",
    y = "% recovered units (Random - Algorithm)\n[Negative means algorithm recovers more units.]\n",
    color = "Number of simulated coders"
  ) +
  scale_color_viridis(discrete = T, direction = -1) +
  theme_minimal() +
  theme(plot.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA))
```

![](README_files/figure-gfm/unnamed-chunk-19-1.png)<!-- -->

``` r
ggsave(filename = "plot/B4_sim_agg.png", width = 8, height = 5)
```

### Figure C5 - Convergence and Gelman-Rubin R-hat statistics

``` r
merged_estimates %>%
  ggplot(aes(x = Rhat)) +
  geom_histogram(bins = 43 ) +
  theme_bw() +
  labs(x = "\nGelman-Rubin R-hat statistics", y = "Number of parameter\n")
```

![](README_files/figure-gfm/unnamed-chunk-20-1.png)<!-- -->

``` r
ggsave(filename = "plot/C5_rhat.png", width = 8, height = 5)
```

### Appendix D - Davidson vs. Bradley-Terry model

``` r
cor_dav_bt <- round(cor(merged_estimates$mean, merged_estimates$mean_bt,use = "complete.obs"), 2)
print(glue("Correlation between Davidson and Bradley-Terry model: {cor_dav_bt}"))
```

    ## Correlation between Davidson and Bradley-Terry model: 0.98

#### Figure D6: Comparison between estimates from a Davidson and a Bradley-Terry model.

``` r
merged_estimates %>%
  ggplot(aes(x = mean,  y = mean_bt, color = party)) +
  geom_point() +
  facet_wrap(~party, scales = "free") +
  theme_minimal() +
  scale_color_manual(values = ger_color) +
  guides(color = "none") +
  labs(x = "\nDavidson model\n(including 'similar positions')", y = "\nBradley-Terry model\n(excluding 'similar positions')") +
  theme(plot.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA))
```

    ## Warning: Removed 2 rows containing missing values or values outside the scale range
    ## (`geom_point()`).

![](README_files/figure-gfm/unnamed-chunk-22-1.png)<!-- -->

``` r
ggsave(filename = "plot/D6_comp_davidson_bt.png", width = 8, height = 5)
```

    ## Warning: Removed 2 rows containing missing values or values outside the scale range
    ## (`geom_point()`).

#### Figure D7: Width of credible intervals across models.

``` r
merged_estimates %>%
  mutate(unc = low - high,
         unc_bt =  low_bt - high_bt) %>%
  ggplot(aes(x = unc,  y = unc_bt, color = party)) +
  geom_point() +
  facet_wrap(~party) +
  theme_minimal() +
  scale_color_manual(values = ger_color) +
  coord_equal(xlim = c(0, 10), ylim = c(0, 10)) +
  guides(color = "none") +
  geom_abline(slope = 1, linetype = 4, color = "grey60") +
  labs(x = "\nDavidson model\n(including 'similar positions')", y = "\nBradley-Terry model\n(excluding 'similar positions')") +
  theme(plot.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA))
```

    ## Warning: Removed 2 rows containing missing values or values outside the scale range
    ## (`geom_point()`).

![](README_files/figure-gfm/unnamed-chunk-23-1.png)<!-- -->

``` r
ggsave(filename = "plot/D7_width_credible_intervals.png", width = 8, height = 5)
```

    ## Warning: Removed 2 rows containing missing values or values outside the scale range
    ## (`geom_point()`).

### Appendix D: Bayesian vs. Frequentist

#### Figure D8: Distribution of estimates across Bayesian and Frequentist frameworks

``` r
p1 <- merged_estimates %>%
  select(pageid, mean, low, high) %>%
  ggplot(aes(x = mean)) +
  geom_histogram() +
  theme_minimal() +
  labs(y = "Davidson model\n(Bayesian)", x = "Estimates") +
  theme(plot.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA))

p2 <- merged_estimates %>%
  select(pageid, mean_bt_f) %>%
  ggplot(aes(x = mean_bt_f)) +
  geom_histogram() +
  theme_minimal() +
  labs(y = "Bradley-Terry model\n(Frequentist)", x = "Estimates") +
  theme(plot.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA))

p1/p2
```

    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

    ## Warning: Removed 2 rows containing non-finite outside the scale range
    ## (`stat_bin()`).

![](README_files/figure-gfm/unnamed-chunk-24-1.png)<!-- -->

``` r
ggsave(filename = "plot/D8_benchmark_freq_hist.png", width = 8, height = 5)
```

    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

    ## Warning: Removed 2 rows containing non-finite outside the scale range
    ## (`stat_bin()`).

#### Figure D9: Comparison between estimates from the Davidson model and the frequentist Bradley-Terry model

``` r
merged_estimates %>%
  ggplot(aes(x = mean,  y = mean_bt_f, color = party)) +
  geom_point() +
  facet_wrap(~party, scales = "free") +
  theme_minimal() +
  scale_color_manual(values = ger_color) +
  guides(color = "none") +
  labs(x = "\nDavidson model\n(including 'similar positions')", y = "\nBradley-Terry model\n(excluding 'similar positions')") +
  theme(plot.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA))
```

    ## Warning: Removed 2 rows containing missing values or values outside the scale range
    ## (`geom_point()`).

![](README_files/figure-gfm/unnamed-chunk-25-1.png)<!-- -->

``` r
ggsave(filename = "plot/D9_benchmark_freq_est.png", width = 8, height = 5)
```

    ## Warning: Removed 2 rows containing missing values or values outside the scale range
    ## (`geom_point()`).

### Figure E11 - Gender

``` r
merged_estimates %>%
  drop_na(gender) %>%
  transmute(
    mean,
    party,
    gender = fct_reorder(as.character(gender), mean),
    party_sex = paste(party, gender) %>%
      fct_reorder(as.numeric(as.factor(party)))
  ) %>%
  ggplot(aes(x = party_sex, y = mean, group = party_sex, fill = party)) +
  geom_boxplot(aes(alpha = gender), show.legend = F, outlier.alpha = 0) +
  scale_fill_manual(values = ger_color) +
  theme_bw() +
  coord_flip() +
  guides(fill = F) +
  labs(y = "Ideological Position", x = "")
```

    ## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
    ## of ggplot2 3.3.4.
    ## This warning is displayed once every 8 hours.
    ## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
    ## generated.

    ## Warning: Using alpha for a discrete variable is not advised.

![](README_files/figure-gfm/unnamed-chunk-26-1.png)<!-- -->

``` r
ggsave(filename = "plot/E11_gender.png", width = 8, height = 5)
```

    ## Warning: Using alpha for a discrete variable is not advised.

### Figure 12: Direct Mandate

``` r
merged_estimates %>%
  filter(!party %in% c("FDP")) %>%
  drop_na(constituency) %>%
  transmute(
    mean,
    party,
    direct = ifelse(constituency != "Landesliste", "Constituency", "Party list") %>%
      fct_reorder(mean),
    party_direct = paste(party, direct) %>%
      fct_reorder(as.numeric(as.factor(party)))
  ) %>%
  ggplot(aes(x = party_direct, y = mean, alpha = direct, group = party_direct, fill = party)) +
  geom_boxplot(aes(alpha = direct), show.legend = F, outlier.alpha = 0) +
  scale_fill_manual(values = ger_color) +
  scale_color_viridis(discrete = T) +
  theme_bw() +
  coord_flip() +
  guides(fill = "none") +
  labs(y = "Ideological Position", x = "", fill = "")
```

    ## Warning: Using alpha for a discrete variable is not advised.

![](README_files/figure-gfm/unnamed-chunk-27-1.png)<!-- -->

``` r
ggsave(filename = "plot/E12_constituency_vs_list_mps.png", width = 8, height = 5)
```

    ## Warning: Using alpha for a discrete variable is not advised.

### Figure 13: East-West comparison

``` r
ost <- c("Mecklenburg-Vorpommern", "Brandenburg", "Sachsen", "Thüringen", "Sachsen-Anhalt", "Berlin")

merged_estimates %>%
  mutate(east = ifelse(region %in% ost, "- East", "- West")) %>%
  mutate(faction = paste(party, east, glue("({n()})")), .by = c(party, east)) %>%
  filter(!is.na(faction)) %>%
  mutate(faction = fct_reorder(faction, as.numeric(party))) %>%
  ggplot(aes(x = mean, y = faction, fill = party)) +
  geom_boxplot(show.legend = F, outlier.alpha = 0) +
  scale_fill_manual(values = ger_color) +
  theme_bw() +
  labs(y = "", x = "Ideological Position")
```

![](README_files/figure-gfm/unnamed-chunk-28-1.png)<!-- -->

``` r
ggsave(filename = "plot/E13_east_divide.png", width = 8, height = 5)
```

## End
